
## =========================================================
## Replication Script (Cleaned) — Vice Presidential Conjoint
## Author: Emma Schroeder & Alejandra Campos
## Purpose: Repro R Script & Figures 
## =========================================================

## -------- Repro & package setup --------
if (!requireNamespace("here", quietly = TRUE)) install.packages("here")
if (!requireNamespace("pacman", quietly = TRUE)) install.packages("pacman")
if (!requireNamespace("conflicted", quietly = TRUE)) install.packages("conflicted")

pacman::p_load(
  tidyverse, dplyr, tidyr, stringr, forcats, readr,
  ggplot2, ggrepel, patchwork, scales, lattice,
  psych, Hmisc, car,
  cjoint, cregg, marginaleffects, broom, broom.helpers,
  survey, ggeffects, stargazer
)

library(conflicted)
conflicts_prefer(
  dplyr::filter, dplyr::lag, dplyr::select, dplyr::mutate,
  dplyr::summarise, dplyr::group_by, tidyr::pivot_longer, tidyr::pivot_wider
)

set.seed(1234)
theme_set(theme_minimal(base_size = 12) +
            theme(panel.grid.minor = element_blank(),
                  plot.title.position = "plot"))

label_pp   <- scales::label_percent(accuracy = 1)
label_amce <- function(x) scales::percent(x, accuracy = 0.1)
drop_color_scale <- function() ggplot2::guides(color = "none")

# Create File for Figures in working directory
fig_dir <- file.path(getwd(), "figures")
if (!dir.exists(fig_dir)) dir.create(fig_dir)


## Palettes used across plots
party_pal <- c("Democrat"   = "#2E86AB",
               "Independent"= "#7E7F9A",
               "Republican"  = "#D1495B",
               "NA"          = "#9E9E9E")

gender_pal <- c("Female" = "#7B6D8D", "Male" = "#2C6E49")

race_pal   <- c("White" = "#6C757D", "Black" = "#1F77B4",
                "Latinx" = "#D62728", "Asian" = "#2CA02C",
                "American Indian / Alaska Native" = "#9467BD",
                "Native Hawaiian / Pacific Islander" = "#8C564B",
                "Multiracial" = "#E377C2")

## Helper: safe save
save_plot <- function(p, filename, width = 6, height = 4, dpi = 320) {
  outdir <- here::here("figures")
  if (!dir.exists(outdir)) dir.create(outdir, recursive = TRUE)
  ggplot2::ggsave(file.path(outdir, filename), plot = p, width = width, height = height, dpi = dpi)
  message("Saved: figures/", filename)
}

## -------- Data --------
data_path <- here::here("vpdata.csv")
if (!file.exists(data_path)) {
  stop("Data file 'vpdata.csv' not found at: ", data_path,
       "\nPlace the CSV next to this script or adjust 'data_path'.")
}
df <- read.csv(data_path, stringsAsFactors = FALSE)

## -------- Scales: Sexism --------
sex_items <- c("sexism_1", "sexism_2", "sexism_3", "sexism_4", "sexism_5")
stopifnot(all(sex_items %in% names(df)))
sex_mat <- df[, sex_items]
sex_alpha <- psych::alpha(sex_mat)

# Summated score (NA=0 behavior mimics original na.rm=TRUE sums)
df$sexism <- rowSums(sex_mat, na.rm = TRUE)

# Lattice item histograms (kept from original)
lattice::histogram(~as.factor(sexism_1), data = df, aspect = 1, xlab = "Women at Home")
lattice::histogram(~as.factor(sexism_2), data = df, aspect = 1, xlab = "Special Favors")
lattice::histogram(~as.factor(sexism_3), data = df, aspect = 1, xlab = "Women Complain")
lattice::histogram(~as.factor(sexism_4), data = df, aspect = 1, xlab = "Protected by Men")
lattice::histogram(~as.factor(sexism_5), data = df, aspect = 1, xlab = "Men Better for Politics")

## -------- Scales: Racial Resentment --------
rr_items <- c("racial_resentment_1", "racial_resentment_2",
              "racial_resentment_3", "racial_resentment_4")
stopifnot(all(rr_items %in% names(df)))

rev_15 <- function(x) ifelse(is.na(x), NA, 6 - x)

rr_df <- df[, rr_items] %>%
  dplyr::mutate(
    lowerclass = rev_15(racial_resentment_2),
    deserve    = rev_15(racial_resentment_3)
  ) %>%
  dplyr::select(racial_resentment_1, lowerclass, deserve, racial_resentment_4)

rr_alpha <- psych::alpha(rr_df)
df$racism <- rowSums(rr_df, na.rm = TRUE)

# --- Robust rebuild of respondent gender (and fix NA->Female) ---

# helper: numeric conversion that first strips factor -> character
to_num <- function(x) suppressWarnings(as.numeric(as.character(x)))

df <- df %>%
  mutate(
    gender_chr = tolower(trimws(as.character(gender))),   # preserve original info
    # Build a 0/1 binary from multiple possible sources:
    gender_bin = case_when(
      !is.na(gender_1) & to_num(gender_1) %in% c(0,1) ~ to_num(gender_1),   # your dummy if present
      gender_chr %in% c("female","f","0","2") ~ 0,                          # 0 or 2 coded Female in your earlier script
      gender_chr %in% c("male","m","1") ~ 1,                                 # 1 coded Male
      TRUE ~ NA_real_
    ),
    # First pass factor
    gender_factor = case_when(
      gender_bin == 0 ~ "Female",
      gender_bin == 1 ~ "Male",
      TRUE ~ NA_character_
    )
  )


df$gender_factor[is.na(df$gender_factor)] <- "Female"
df$gender_factor <- factor(df$gender_factor, levels = c("Female","Male"))

# sanity check
table(df$gender_factor, useNA = "ifany")


## -------- Recodes --------

# Gender --> gender factor
df <- df %>%
  dplyr::mutate(
    gender = as.numeric(gender),
    gender_1 = dplyr::case_when(
      gender == 1 ~ 1,   # male
      gender == 0 ~ 0,   # female
      TRUE ~ NA_real_
    ),
    gender_factor = factor(ifelse(gender_1 == 1, "Male", "Female"),
                           levels = c("Female", "Male"))
  )

# PID 3-category --> partyid factor

df <- df %>%
  dplyr::mutate(
    pid3 = as.numeric(pid3),
    partyid_num = dplyr::case_when(
      pid3 == 1 ~ 1,   # Democrat
      pid3 == 2 ~ 3,   # Republican
      pid3 == 3 ~ 2,   # Independent
      pid3 %in% c(4,5) ~ NA_real_,
      TRUE ~ NA_real_
    ),
    partyid = factor(partyid_num,
                     levels = c(1,2,3),
                     labels = c("Democrat","Independent","Republican"))
  )

# Race collapsing to 7 categories including Multiracial
df <- df %>%
  dplyr::mutate(
    race_1_num = dplyr::case_when(
      race %in% c("1,3","1,5","1,3,4","1,2","1,6","1,4","1,2,3,5","1,3,5","1,4,5") ~ 7,
      race == "1" ~ 1, # White
      race == "2" ~ 2, # Black
      race == "3" ~ 3, # Latinx
      race == "4" ~ 4, # AI/AN
      race == "5" ~ 5, # Asian
      race == "6" ~ 6, # NH/PI
      TRUE ~ NA_real_
    ),
    race_1 = factor(race_1_num,
                    levels = 1:7,
                    labels = c("White","Black","Latinx","American Indian / Alaska Native",
                               "Asian","Native Hawaiian / Pacific Islander","Multiracial")),
    nonwhite = factor(ifelse(race_1 == "White", "White", "Nonwhite"),
                      levels = c("White","Nonwhite"))
  )

## Candidate attribute factors & baseline levels
fac_or_chr <- function(x) if (is.character(x)) factor(x) else factor(as.character(x))
df <- df %>%
  dplyr::mutate(
    candidate_race       = fac_or_chr(candidate_race),
    candidate_ideology   = fac_or_chr(candidate_ideology),
    candidate_experience = fac_or_chr(candidate_experience),
    candidate_region     = fac_or_chr(candidate_region),
    candidate_gender     = fac_or_chr(candidate_gender)
  ) %>%
  dplyr::mutate(
    candidate_experience = forcats::fct_relevel(candidate_experience, "Member of House of Representatives"),
    candidate_race       = forcats::fct_relevel(candidate_race, "White"),
    candidate_ideology   = forcats::fct_relevel(candidate_ideology, "Moderate"),
    candidate_gender     = forcats::fct_relevel(candidate_gender, "Male"),
    candidate_region     = forcats::fct_relevel(candidate_region, "Northeast"),
    Experience = candidate_experience,
    Race       = candidate_race,
    Ideology   = candidate_ideology,
    Gender     = candidate_gender,
    Region     = candidate_region
  )

## -------- Analysis Subsets (create BEFORE modeling) --------
female <- dplyr::filter(df, gender_factor == "Female")
male   <- dplyr::filter(df, gender_factor == "Male")

white_df    <- dplyr::filter(df, nonwhite == "White")
nonwhite_df <- dplyr::filter(df, nonwhite == "Nonwhite")

dem_df <- dplyr::filter(df, partyid == "Democrat")
rep_df <- dplyr::filter(df, partyid == "Republican")

## -------- OLS Examples --------
m1 <- lm(outcome ~ candidate_gender, data = female)
m2 <- lm(outcome ~ candidate_gender, data = male)
stargazer::stargazer(m1, m2, type = "text", title = "OLS by respondent gender")

m3 <- lm(outcome ~ candidate_gender + nonwhite + age + income, data = female)
m4 <- lm(outcome ~ candidate_gender + nonwhite + age + income, data = male)
stargazer::stargazer(m3, m4, type = "text", title = "OLS with controls by gender")

## -------- Conjoint AMCE (cjoint) --------
stopifnot("ResponseId" %in% names(df))

m_amce_all <- cjoint::amce(
  outcome ~ Race + Ideology + Experience + Region + Gender,
  data = df, respondent.id = "ResponseId", cluster = TRUE
)
print(summary(m_amce_all))

pdf(file.path(fig_dir, "AMCE_overall.pdf"), width = 7, height = 5)
plot(m_amce_all, xlab = "Change in Pr(Vote for Candidate)",
     ci = 0.95, breaks = c(-0.2, 0, 0.2), text.size = 11, point.size = 0.6)
dev.off()

## By respondent race group
m_amce_nonwhite <- cjoint::amce(outcome ~ Race + Ideology + Experience + Region + Gender,
                                data = nonwhite_df, respondent.id = "ResponseId")
m_amce_white    <- cjoint::amce(outcome ~ Race + Ideology + Experience + Region + Gender,
                                data = white_df, respondent.id = "ResponseId")

pdf(file.path(fig_dir, "AMCE_by_racegroup.pdf"), width = 10, height = 4)
par(mfrow = c(1,2))
plot(m_amce_nonwhite, xlab = "Δ Pr(Select)", ci = 0.95,
     breaks = c(-0.2, 0, 0.2), text.size = 9, point.size = 0.4)
plot(m_amce_white, xlab = "Δ Pr(Select)", ci = 0.95,
     breaks = c(-0.2, 0, 0.2), text.size = 9, point.size = 0.4)
par(mfrow = c(1,1))
dev.off()

## By respondent gender
m_amce_f <- cjoint::amce(outcome ~ Race + Ideology + Experience + Region + Gender,
                         data = female, respondent.id = "ResponseId")
m_amce_m <- cjoint::amce(outcome ~ Race + Ideology + Experience + Region + Gender,
                         data = male, respondent.id = "ResponseId")

pdf(file.path(fig_dir, "AMCE_by_gender.pdf"), width = 10, height = 4)
par(mfrow = c(1,2))
plot(m_amce_f, xlab = "Δ Pr(Select)", ci = 0.95,
     breaks = c(-0.2, 0, 0.2), text.size = 9, point.size = 0.4)
plot(m_amce_m, xlab = "Δ Pr(Select)", ci = 0.95,
     breaks = c(-0.2, 0, 0.2), text.size = 9, point.size = 0.4)
par(mfrow = c(1,1))
dev.off()

## -------- Interactions within AMCE --------
m_int_gender_race <- cjoint::amce(
  outcome ~ candidate_race + candidate_ideology + candidate_experience +
    candidate_gender + candidate_gender:candidate_race,
  data = df, cluster = FALSE
)

pdf(here::here("figures", "ACIE_gender_x_race.pdf"), width = 7, height = 5)
plot(m_int_gender_race, facet.names = "candidate_gender",
     main = "ACIE: Gender × Race")
dev.off()

## -------- Marginal Means (cregg & marginaleffects) --------
model_candidate_amce <- cregg::amce(
  data = df,
  formula = outcome ~ candidate_race + candidate_gender + candidate_ideology +
    candidate_region + candidate_experience,
  id = ~ ResponseId
)

# Basic plot (cregg)
gg1 <- plot(model_candidate_amce) +
  drop_color_scale() +
  labs(title = "AMCEs from cregg::amce()")
save_plot(gg1, "cregg_amce.png", width = 7, height = 5)

## Survey-style GLM for MMs (quasi-binomial to be robust if outcome is 0/1)
candidate_svy_design <- survey::svydesign(ids = ~ ResponseId, weights = ~ 1, data = df)

model_svy <- survey::svyglm(
  outcome ~ candidate_race + candidate_gender + candidate_ideology +
    candidate_region + candidate_experience,
  design = candidate_svy_design,
  family = quasibinomial()
)

# Marginal means from GLM
mm_mfx <- marginaleffects::marginal_means(
  model_svy,
  newdata = c("candidate_region", "candidate_race", "candidate_gender",
              "candidate_ideology", "candidate_experience"),
  wts = "cells"
)
mm_df <- as.data.frame(mm_mfx)

# Example: manual check of means by candidate_race
means_by_race <- df %>%
  dplyr::group_by(candidate_race) %>%
  dplyr::summarise(avg = mean(outcome), .groups = "drop")

# Plot of marginal means for each attribute block
variable_lookup <- tibble::tribble(
  ~variable,                    ~variable_nice,
  "candidate_region",           "Region",
  "candidate_race",             "Race",
  "candidate_gender",           "Gender",
  "candidate_experience",       "Experience",
  "candidate_ideology",         "Ideology"
) %>% dplyr::mutate(variable_nice = forcats::fct_inorder(variable_nice))

plot_mm_mfx <- mm_df %>%
  dplyr::as_tibble() %>%
  dplyr::mutate(value = forcats::fct_inorder(value)) %>%
  dplyr::left_join(variable_lookup, by = dplyr::join_by(term == variable)) %>%
  dplyr::mutate(across(c(value, variable_nice), ~forcats::fct_inorder(.)))

gg2 <- ggplot(
  plot_mm_mfx,
  aes(x = estimate, y = value, color = variable_nice)
) +
  geom_vline(xintercept = 0.5) +
  geom_pointrange(aes(xmin = conf.low, xmax = conf.high)) +
  scale_x_continuous(labels = scales::label_percent()) +
  drop_color_scale() +
  labs(
    x = "Marginal mean",
    y = NULL,
    title = "Marginal Means by Attribute"
  ) +
  ggforce::facet_col(facets = "variable_nice", scales = "free_y", space = "free")

save_plot(gg2, "marginal_means.png", width = 8, height = 8)

## -------- Subgroup MM: candidate_gender by respondent gender --------
# Build interaction in GLM
model_gender <- stats::glm(
  outcome ~ candidate_gender * gender_factor,
  data = df, family = quasibinomial()
)

party_mms_mfx <- marginaleffects::marginal_means(
  model_gender,
  newdata = c("candidate_gender", "gender_factor"),
  cross = TRUE, wts = "cells"
) %>% as.data.frame()

gg3 <- party_mms_mfx %>%
  dplyr::mutate(estimate_nice = dplyr::if_else(estimate != 0, scales::label_percent()(estimate), NA_character_)) %>%
  ggplot(aes(x = estimate, y = candidate_gender, color = gender_factor)) +
  geom_vline(xintercept = 0.5) +
  geom_pointrange(aes(xmin = conf.low, xmax = conf.high)) +
  scale_x_continuous(labels = scales::label_percent()) +
  scale_color_manual(values = gender_pal) +
  labs(x = "Marginal means", y = NULL, color = NULL, title = "Marginal means by respondent gender") +
  theme(legend.position = "bottom", legend.justification = "left")
save_plot(gg3, "mm_by_gender.png", width = 7, height = 5)

---- # double check figure path & packages are loaded -----------


fig_dir <- file.path(getwd(), "figures")
if (!dir.exists(fig_dir)) dir.create(fig_dir, recursive = TRUE)


if (!requireNamespace("marginaleffects", quietly = TRUE)) install.packages("marginaleffects")
if (!requireNamespace("ggforce", quietly = TRUE)) install.packages("ggforce")

# Recreate the survey model used for MM
candidate_svy_design <- survey::svydesign(ids = ~ ResponseId, weights = ~ 1, data = df)

model_svy <- survey::svyglm(
  outcome ~ candidate_race + candidate_gender + candidate_ideology +
    candidate_region + candidate_experience,
  design = candidate_svy_design,
  family = quasibinomial()
)

#  Marginal means
mm_mfx <- marginaleffects::marginal_means(
  model_svy,
  newdata = c("candidate_region", "candidate_race", "candidate_gender",
              "candidate_ideology", "candidate_experience"),
  wts = "cells"
)
mm_df <- as.data.frame(mm_mfx)

variable_lookup <- tibble::tribble(
  ~variable,                    ~variable_nice,
  "candidate_region",           "Region",
  "candidate_race",             "Race",
  "candidate_gender",           "Gender",
  "candidate_experience",       "Experience",
  "candidate_ideology",         "Ideology"
) |> dplyr::mutate(variable_nice = forcats::fct_inorder(variable_nice))

plot_mm_mfx <- mm_df |>
  dplyr::as_tibble() |>
  dplyr::mutate(value = forcats::fct_inorder(value)) |>
  dplyr::left_join(variable_lookup, by = dplyr::join_by(term == variable)) |>
  dplyr::mutate(across(c(value, variable_nice), ~forcats::fct_inorder(.)))

# Build the MM plot
mm_plot <- ggplot(
  plot_mm_mfx,
  aes(x = estimate, y = value)
) +
  geom_vline(xintercept = 0.5) +
  geom_pointrange(aes(xmin = conf.low, xmax = conf.high)) +
  scale_x_continuous(labels = scales::label_percent()) +
  labs(
    x = "Marginal mean",
    y = NULL,
    title = "Marginal Means by Attribute"
  ) +
  ggforce::facet_col(facets = "variable_nice", scales = "free_y", space = "free")

# Save as PNG and PDF
ggplot2::ggsave(file.path(fig_dir, "marginal_means.png"), mm_plot, width = 8, height = 8, dpi = 320)
ggplot2::ggsave(file.path(fig_dir, "marginal_means.pdf"), mm_plot, width = 8, height = 8)
message("Saved: ", file.path(fig_dir, "marginal_means.png"))
message("Saved: ", file.path(fig_dir, "marginal_means.pdf"))

# Subgroup MM (candidate_gender by respondent gender), also PNG + PDF
model_gender <- stats::glm(
  outcome ~ candidate_gender * gender_factor,
  data = df, family = quasibinomial()
)

mm_gender <- marginaleffects::marginal_means(
  model_gender,
  newdata = c("candidate_gender", "gender_factor"),
  cross = TRUE, wts = "cells"
) |> as.data.frame()

mm_gender_plot <- mm_gender |>
  dplyr::mutate(estimate_lab = ifelse(estimate != 0, scales::label_percent()(estimate), NA)) |>
  ggplot(aes(x = estimate, y = candidate_gender, color = gender_factor)) +
  geom_vline(xintercept = 0.5) +
  geom_pointrange(aes(xmin = conf.low, xmax = conf.high)) +
  scale_x_continuous(labels = scales::label_percent()) +
  scale_color_manual(values = c("Female"="#7B6D8D","Male"="#2C6E49")) +
  labs(x = "Marginal means", y = NULL, color = NULL, title = "Marginal means by respondent gender") +
  theme(legend.position = "bottom", legend.justification = "left")

ggplot2::ggsave(file.path(fig_dir, "mm_by_gender.png"), mm_gender_plot, width = 7, height = 5, dpi = 320)
ggplot2::ggsave(file.path(fig_dir, "mm_by_gender.pdf"), mm_gender_plot, width = 7, height = 5)
message("Saved: ", file.path(fig_dir, "mm_by_gender.png"))
message("Saved: ", file.path(fig_dir, "mm_by_gender.pdf"))


## -------- End of Script --------
message("Finished. Figures saved under ./figures/")
